| 1 | import pandas as pd |
| 2 | import wikidata_plain_sparql as wikidata |
| 3 | |
| 4 | from bokeh.palettes import turbo |
| 5 | from bokeh.plotting import figure, output_notebook, show, gridplot |
| 6 | |
| 7 | # set bokeh output mode to notebook |
| 8 | output_notebook() |
| 9 | |
| 10 | from helper import get_jhu_cached, create_grid |
| 1 | # get all dates from earliest available data until yesterday |
| 2 | all_dates = pd.date_range(start='2020-01-22', end='today')[:-1] |
| 3 | |
| 4 | # standardize column names for all entries |
| 5 | def rename_columns(column): |
| 6 | column_map = { |
| 7 | 'Lat': 'Latitude', |
| 8 | 'Long_': 'Longitude', |
| 9 | 'Incidence_Rate': 'Incident_Rate' |
| 10 | } |
| 11 | if column in column_map: |
| 12 | return column_map[column] |
| 13 | return column.replace('/', '_').replace('-', '_').replace(' ', '_') |
| 14 | |
| 15 | # load data from all dates |
| 16 | all_data = [] |
| 17 | for date in all_dates: |
| 18 | data = pd.read_csv(get_jhu_cached(date)) |
| 19 | data.rename(columns=rename_columns, inplace=True) |
| 20 | data['Date'] = date |
| 21 | all_data.append(data) |
| 22 | |
| 23 | # combine data from all days |
| 24 | raw_data = pd.concat(all_data) |
| 1 | # remove cruise ships from countries |
| 2 | exclude_countries = [ |
| 3 | 'MS Zaandam', |
| 4 | 'Diamond Princess', |
| 5 | 'Cruise Ship' |
| 6 | ] |
| 7 | |
| 8 | raw_data = raw_data[~raw_data['Country_Region'].isin(exclude_countries)] |
| 9 | |
| 10 | # standardize country names for all entries |
| 11 | country_mapping = { |
| 12 | 'Hong Kong.+': 'Hong Kong', |
| 13 | 'Iran.+': 'Iran', |
| 14 | '.*Congo.*': 'Congo', |
| 15 | 'Mainland China': 'China', |
| 16 | '.*Bahamas.*': 'The Bahamas', |
| 17 | '.*Gambia.*': 'The Gambia', |
| 18 | 'Viet Nam': 'Vietnam', |
| 19 | 'Taiwan\*': 'Taiwan', |
| 20 | 'Cote d\'Ivoire': 'Ivory Coast', |
| 21 | 'Cabo Verde': 'Cape Verde', |
| 22 | 'Russian Federation': 'Russia', |
| 23 | ' Azerbaijan': 'Azerbaijan', |
| 24 | 'Holy See': 'Vatican City', |
| 25 | 'Republic of Ireland': 'Ireland', |
| 26 | 'Republic of Moldova': 'Moldova', |
| 27 | 'Czechia': 'Czech Republic', |
| 28 | 'Republic of Korea|Korea, South': 'South Korea', |
| 29 | 'Timor-Leste': 'East Timor', |
| 30 | 'Macao SAR|Macau': 'Macao', |
| 31 | 'UK': 'United Kingdom', |
| 32 | 'Jersey|Guernsey': 'Channel Islands', |
| 33 | 'Dominica': 'Dominican Republic' |
| 34 | } |
| 35 | |
| 36 | raw_data['Country_Region'] = raw_data['Country_Region'].replace(to_replace=country_mapping.keys(), value=country_mapping.values(), regex=True) |
| 1 | # group data by country |
| 2 | updates_per_country = raw_data.groupby(['Country_Region', 'Date']).agg( |
| 3 | Confirmed = ('Confirmed','sum'), |
| 4 | Deaths = ('Deaths','sum'), |
| 5 | ).reset_index() |
| 6 | |
| 7 | # get all cantons |
| 8 | all_countries = updates_per_country['Country_Region'].unique() |
| 9 | all_countries.sort() |
| 10 | |
| 11 | # calculate difference between days |
| 12 | for country in all_countries: |
| 13 | updates_for_country = updates_per_country.loc[updates_per_country['Country_Region'] == country] |
| 14 | previous_index = None |
| 15 | total_cases = 0 |
| 16 | for index in updates_for_country.index: |
| 17 | if previous_index != None: |
| 18 | new_cases = updates_per_country.at[index, 'Confirmed'] - updates_per_country.at[previous_index, 'Confirmed'] |
| 19 | updates_per_country.at[index, 'New_Cases'] = new_cases |
| 20 | total_cases += new_cases |
| 21 | updates_per_country.at[index, 'Total_Cases'] = total_cases |
| 22 | updates_per_country.at[index, 'New_Deaths'] = updates_per_country.at[index, 'Deaths'] - updates_per_country.at[previous_index, 'Deaths'] |
| 23 | previous_index = index |
| 24 | |
| 25 | updates_per_country = updates_per_country[updates_per_country['Date'] >= '2020-01-23'] |
| 26 | |
| 27 | worldwide_pretty = updates_per_country.loc[:, ['Date', 'Country_Region', 'New_Cases', 'Total_Cases', 'New_Deaths']] |
| 1 | # calculate new cases |
| 2 | new_cases_graph = figure(title="New COVID-19 cases per country", y_axis_label='new cases', x_axis_type='datetime', sizing_mode='stretch_width') |
| 3 | palette = turbo(all_countries.size) |
| 4 | i = 0 |
| 5 | for country in all_countries: |
| 6 | updates_for_country = updates_per_country.loc[updates_per_country['Country_Region'] == country] |
| 7 | new_cases_graph.line(updates_for_country['Date'], updates_for_country['New_Cases'], line_color=palette[i], legend_label=country, line_width=2) |
| 8 | i += 1 |
| 9 | show(new_cases_graph) |
| 1 | # show total cases |
| 2 | total_cases_graph = figure(title="Total COVID-19 cases per country", y_axis_label='total cases', x_axis_type='datetime', sizing_mode='stretch_width') |
| 3 | palette = turbo(all_countries.size) |
| 4 | i = 0 |
| 5 | for country in all_countries: |
| 6 | updates_for_country = updates_per_country.loc[updates_per_country['Country_Region'] == country] |
| 7 | total_cases_graph.line(updates_for_country['Date'], updates_for_country['Total_Cases'], line_color=palette[i], legend_label=country, line_width=2) |
| 8 | i += 1 |
| 9 | show(total_cases_graph) |
| 1 | # get population data from WikiData |
| 2 | canton_data = wikidata.query(''' |
| 3 | SELECT ?shortCode ?population ?canton WHERE { |
| 4 | ?canton wdt:P31 wd:Q23058. |
| 5 | ?canton wdt:P300 ?shortCode. |
| 6 | OPTIONAL { |
| 7 | ?canton p:P1082 ?population_stmt. |
| 8 | ?population_stmt ps:P1082 ?population. |
| 9 | ?population_stmt pq:P585 ?population_date. |
| 10 | } |
| 11 | FILTER NOT EXISTS { |
| 12 | ?canton p:P1082/pq:P585 ?population_date_. |
| 13 | FILTER (?population_date_ > ?population_date) |
| 14 | } |
| 15 | } |
| 16 | ORDER BY ?shortCode |
| 17 | ''') |
| 18 | canton_data.set_index('shortCode', inplace=True) |
| 1 | raw_data = pd.read_csv('https://raw.githubusercontent.com/openZH/covid_19/master/COVID19_Fallzahlen_CH_total_v2.csv') |
| 2 | |
| 3 | # convert to date |
| 4 | raw_data['date'] = pd.to_datetime(raw_data['date']) |
| 5 | |
| 6 | # remove FL |
| 7 | swiss = raw_data[raw_data['abbreviation_canton_and_fl'] != 'FL'] |
| 8 | |
| 9 | # only use data after 1st of june |
| 10 | swiss = swiss[swiss['date'] >= '2020-05-31'] |
| 11 | |
| 12 | # only keep useful entries |
| 13 | conf_cases = swiss.loc[-swiss['ncumul_conf'].isna()].copy() |
| 14 | |
| 15 | # get all cantons |
| 16 | all_cantons = conf_cases['abbreviation_canton_and_fl'].unique() |
| 17 | all_cantons.sort() |
| 18 | |
| 19 | # calculate new cases |
| 20 | conf_cases['new_cases'] = 0 |
| 21 | |
| 22 | for canton in all_cantons: |
| 23 | updates_for_canton = conf_cases.loc[conf_cases['abbreviation_canton_and_fl'] == canton] |
| 24 | previous_index = None |
| 25 | total_cases = 0 |
| 26 | for index in updates_for_canton.index: |
| 27 | if previous_index != None: |
| 28 | new_cases = conf_cases.at[index, 'ncumul_conf'] - conf_cases.at[previous_index, 'ncumul_conf'] |
| 29 | conf_cases.at[index, 'new_cases'] = new_cases |
| 30 | conf_cases.at[index, 'new_cases_relative'] = new_cases / int(canton_data.at['CH-' + canton, 'population']) * 100000 |
| 31 | total_cases += new_cases |
| 32 | conf_cases.at[index, 'total_cases'] = total_cases |
| 33 | conf_cases.at[index, 'total_cases_relative'] = total_cases / int(canton_data.at['CH-' + canton, 'population']) * 100000 |
| 34 | conf_cases.at[index, 'new_deaths'] = conf_cases.at[index, 'ncumul_deceased'] - conf_cases.at[previous_index, 'ncumul_deceased'] |
| 35 | previous_index = index |
| 36 | |
| 37 | conf_cases = conf_cases[conf_cases['date'] >= '2020-06-01'] |
| 38 | |
| 39 | swiss_pretty = conf_cases.loc[:, ['date', 'abbreviation_canton_and_fl', 'new_cases', 'total_cases', 'new_deaths']] |
| 1 | # calculate new cases |
| 2 | graphs = [] |
| 3 | max_new_cases = conf_cases['new_cases_relative'].max() |
| 4 | for canton in all_cantons: |
| 5 | update_for_canton = conf_cases.loc[conf_cases['abbreviation_canton_and_fl'] == canton].copy() |
| 6 | |
| 7 | update_for_canton['new_cases_relative_avg'] = update_for_canton['new_cases_relative'].rolling(window=7).mean() |
| 8 | |
| 9 | new_cases_graph = figure(title=canton, y_axis_label='new cases', y_range=[0, max_new_cases], x_axis_type='datetime') |
| 10 | new_cases_graph.line(update_for_canton['date'], update_for_canton['new_cases_relative'], line_width=1) |
| 11 | new_cases_graph.line(update_for_canton['date'], update_for_canton['new_cases_relative_avg'], line_color='red', line_width=1) |
| 12 | graphs.append(new_cases_graph) |
| 13 | |
| 14 | show(create_grid(graphs, sizing_mode='scale_width')) |
| 1 | # calculate total cases |
| 2 | graphs = [] |
| 3 | max_total_cases = conf_cases['total_cases_relative'].max() |
| 4 | for canton in all_cantons: |
| 5 | update_for_canton = conf_cases.loc[conf_cases['abbreviation_canton_and_fl'] == canton] |
| 6 | |
| 7 | total_cases_graph = figure(title=canton, y_axis_label='total cases', y_range=[0, max_total_cases], x_axis_type='datetime') |
| 8 | total_cases_graph.line(update_for_canton['date'], update_for_canton['total_cases_relative'], line_width=1) |
| 9 | graphs.append(total_cases_graph) |
| 10 | |
| 11 | show(create_grid(graphs, sizing_mode='scale_width')) |
| 1 | worldwide_pretty |
| Date | Country_Region | New_Cases | Total_Cases | New_Deaths | |
|---|---|---|---|---|---|
| 0 | 2020-02-24 | Afghanistan | NaN | NaN | NaN |
| 1 | 2020-02-25 | Afghanistan | 0.0 | 0.0 | 0.0 |
| 2 | 2020-02-26 | Afghanistan | 0.0 | 0.0 | 0.0 |
| 3 | 2020-02-27 | Afghanistan | 0.0 | 0.0 | 0.0 |
| 4 | 2020-02-28 | Afghanistan | 0.0 | 0.0 | 0.0 |
| ... | ... | ... | ... | ... | ... |
| 78460 | 2020-03-12 | occupied Palestinian territory | 0.0 | -25.0 | 0.0 |
| 78461 | 2020-03-14 | occupied Palestinian territory | 0.0 | -25.0 | 0.0 |
| 78462 | 2020-03-15 | occupied Palestinian territory | 0.0 | -25.0 | 0.0 |
| 78463 | 2020-03-16 | occupied Palestinian territory | 0.0 | -25.0 | 0.0 |
| 78464 | 2020-03-17 | occupied Palestinian territory | 0.0 | -25.0 | 0.0 |
78457 rows × 5 columns
| 1 | swiss_pretty |
| date | abbreviation_canton_and_fl | new_cases | total_cases | new_deaths | |
|---|---|---|---|---|---|
| 2346 | 2020-06-01 | BL | 1 | 1.0 | 0.0 |
| 2347 | 2020-06-01 | FR | 0 | 0.0 | 0.0 |
| 2348 | 2020-06-01 | GE | 1 | 1.0 | 0.0 |
| 2349 | 2020-06-01 | GR | 0 | 0.0 | 0.0 |
| 2350 | 2020-06-01 | JU | 0 | 0.0 | 0.0 |
| ... | ... | ... | ... | ... | ... |
| 10632 | 2021-04-28 | SZ | 46 | 10748.0 | 2.0 |
| 10633 | 2021-04-28 | TG | 0 | 17161.0 | 0.0 |
| 10634 | 2021-04-28 | AI | 9 | 963.0 | 0.0 |
| 10635 | 2021-04-28 | BS | 73 | 10898.0 | 1.0 |
| 10636 | 2021-04-28 | ZH | 362 | 102408.0 | 0.0 |
7768 rows × 5 columns
| 1 |